#  Explore topic 13e
#  First load all of the functions we will use
source( "../gnrnd5.R")
source( "../gnrnd4.R")
source( "../pop_sd.R")
source( "../ci_stddev.R")

#  Topic 13e looks at generating a confidence
#  interval for a population standard deviation

#
#  Let us generate a population
gnrnd5(145898499904, 432003526)
#  put the population into big_pop
big_pop <- L1
# We can look at some of the population
head( big_pop, 20)

#   Then, our interest is to get a 97% confidence interval
#   for the standard deviation of the population.

#  To do this we will take a sample
#  of a certain size 
samp_size <- 43

#  then, just so that we can all get the same
#  sample, generate the index values for 
#  a sample of that size
key1 <- 550910001+ (samp_size-1)*100
gnrnd4(key1, 500000001)
L1
this_sample <- big_pop[ L1 ]
#  look at our sample
this_sample

#  we can find the sample standard deviation

samp_sd <- sd( this_sample )
samp_sd



# Now,  we will use the sample standard deviation as 
# our point estimate.  We know that the distribution
# of the ratio of (samp_size-1)*samp_sd^2 to 
# sigma^2, where sigma is the standard deviation of 
# the population will be a chi-squared 
# distribution with samp_size-1 degrees of freedom.

# Therefore, for a 97% confidence interval we want to find
# the x_left value that has (1-0.97)/2 = 0.015 as the 
# area to the left of that value, for samp_size-1 
# degrees of freedom and the value x_right that has
# 0.015 as the area to the right of that value.
x_left <- qchisq( 0.015, samp_size-1 )
x_left
x_right <- qchisq( 0.015, samp_size-1, lower.tail=FALSE )
x_right
# then the left side of the confidence interval
# is 
left_side <- sqrt((samp_size-1)*samp_sd^2/x_right)
# and the right side of the confidence interval
# is 
right_side <- sqrt((samp_size-1)*samp_sd^2/x_left)
#
left_side
right_side
#

###  of course all of this could be done via
#    our ci_known function
ci_stddev(samp_size, samp_sd, 0.97)

##################################
#  we could try this at a different confidence
#  level.  Just alter the confidence level and
#  then run the subsequent lines, or just skip
#  down to line 72 and get the new values
####################################

#  If we express the confidence level as a 
#  percent then we say that that percent of the
#  confidence intervals that we generate 
#  using this methodology will contain the 
#  true standard deviation.  That means, that at this point
#  in running the script, I do not know if the
#  97% confidence interval that we generated, 
#  namely (35.657, 57.607 ) does or does not
#  contain the true population standard deviation.  
#
#  Let us find the true standard deviation and see if it is
#  in the interval.

sigma <- pop_sd( big_pop )
sigma
#   yes it is!  

#  This has been an illustration, but let us 
#  go through the process 10000 times and 
#  see how many intervals that we generate this
#  way contain the true mean

#  first reset the confidence level and
#  sample size  just in case we want to change
#  them later

conf_level <- 0.97
samp_size <- 43

L3 <- 1:10000
for( i in 1:10000 ) {
  this_sample <- sample( big_pop, samp_size )
  samp_sd  <- sd( this_sample )
  this_ci <- ci_stddev( samp_size,
                        samp_sd,
                        conf_level)
  if( this_ci[1] <= sigma &
      sigma <= this_ci[2] ) {
    L3[i] = "hit"}
  else {
    L3[i] = "missed"}
}
# see how we did
table( L3 )

#########
#  if we want we can do this again and we
#  can even change the values in lines 107 
#  and/or 108 if we want.